# R Script for Replicating the Partisans, Leaners, and Independents Visualization
# Author: William Gee
# Date: September 26, 2024
# Description: This script reads Gallup Partisan Affiliation Time Series Data (2004-2023) for Visualizations, processes it, and generates a line graph showing trends in partisan affiliation over time.

# Load required libraries
library(readxl)    # For reading Excel files
library(dplyr)     # For data manipulation
library(tidyr)     # For data reshaping
library(ggplot2)   # For plotting

# Read in the data
# Please replace 'path_to_file.xlsx' with the actual path to your Excel file.
# data <- read_excel("path_to_file.xlsx")

# Check column names (optional, for verification)
# print(colnames(data))

# Select relevant columns
# Ensure that the column names match exactly with those in your dataset.
# Correct any typos, such as 'True_Idependents_6mo' to 'True_Independents_6mo'.
PLI_6mo <- data %>%
  select(Date, Month, Year, Partisans_6mo, Leaners_6mo, True_Independents_6mo)

# Convert 'Date' column to Date type
PLI_6mo$Date <- as.Date(PLI_6mo$Date, format = "%Y-%m-%d")

# Reshape data from wide to long format
long_PLI_6mo <- PLI_6mo %>%
  pivot_longer(cols = c(Partisans_6mo, Leaners_6mo, True_Independents_6mo),
               names_to = "Category", values_to = "Value")

# Ensure 'Year' is numeric
long_PLI_6mo$Year <- as.numeric(long_PLI_6mo$Year)

# Calculate trendlines (linear models) for each category
eq_partisans <- lm(Value ~ Year, data = filter(long_PLI_6mo, Category == "Partisans_6mo"))
eq_leaners <- lm(Value ~ Year, data = filter(long_PLI_6mo, Category == "Leaners_6mo"))
eq_independents <- lm(Value ~ Year, data = filter(long_PLI_6mo, Category == "True_Independents_6mo"))

# Extract coefficients and create labels for plotting
coef_val_partisans <- round(coef(eq_partisans)[2], 2)
coef_val_leaners <- round(coef(eq_leaners)[2], 2)
coef_val_independents <- round(coef(eq_independents)[2], 2)

# Create labels with regression coefficients
lab_partisans <- bquote(beta[Partisans] == .(coef_val_partisans) ~ "***")
lab_leaners <- bquote(beta[Leaners] == .(coef_val_leaners) ~ "***")
lab_independents <- bquote(beta[Independents] == .(coef_val_independents) ~ "***")

# Filter for January data points to plot shape indicators
jan_data_PLI_6mo <- long_PLI_6mo %>% filter(Month == "January")

# Create breaks for x-axis using unique years from January data
unique_years <- unique(jan_data_PLI_6mo$Year)
breaks_dates <- as.Date(paste0(unique_years, "-01-01"))

# Plotting the data
ggplot(long_PLI_6mo, aes(x = Date, y = Value, color = Category)) +
  geom_line(size = 1) +
  geom_point(data = jan_data_PLI_6mo, aes(shape = Category), size = 3) +
  xlab("Year") +
  ylab("%") +
  scale_x_date(date_labels = "%Y", breaks = breaks_dates) +
  scale_y_continuous(breaks = seq(0, 100, 10),
                     labels = paste0(seq(0, 100, 10), "%"),
                     limits = c(0, 100)) +
  scale_color_manual(
    values = c("Partisans_6mo" = "grey30",
               "Leaners_6mo" = "grey50",
               "True_Independents_6mo" = "grey70"),
    labels = c("Partisans",
               "Leaners",
               "True Independents"),
    name = "Partisan Category"
  ) +
  scale_shape_manual(
    values = c("Partisans_6mo" = 18,      # Diamond
               "Leaners_6mo" = 21,        # Bullet
               "True_Independents_6mo" = 19),  # Circle
    guide = 'none'  # No separate shape legend
  ) +
  theme_bw() +
  theme(
    axis.title.y = element_text(angle = 0, hjust = 0.5, vjust = 0.5),
    legend.key = element_rect(fill = "white", colour = "white"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  guides(color = guide_legend(override.aes = list(shape = c(18, 21, 19)))) +
  geom_smooth(method = "lm", se = FALSE, aes(group = Category)) +
  # Add the regression coefficient labels
  annotate("text", x = as.Date("2023-01-01"), y = 64, label = lab_partisans, hjust = 1, vjust = 1, size = 5) +
  annotate("text", x = as.Date("2023-01-01"), y = 30, label = lab_leaners, hjust = 1, vjust = 1, size = 5) +
  annotate("text", x = as.Date("2021-06-01"), y = 16, label = lab_independents, hjust = 1, vjust = 1, size = 5)

# Print summaries of the linear models (optional)
summary(eq_partisans)
summary(eq_leaners)
summary(eq_independents)

